# -*- coding: utf-8 -*-
# @Time    : 2021/6/7 14:49
# @Author  : 万方名
# @FileName: 文本向量化.py

from numpy import array
from numpy import argmax
from keras.utils import to_categorical

from sklearn.preprocessing import LabelEncoder

# define example
##对字符串
data = ['你', '谁', '是']
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
# print(integer_encoded)

##对数值
# data=[1, 3, 2, 0, 3, 2, 2, 1, 0, 1]
# data=array(data)
# print(data)

# one hot encode
encoded = to_categorical(integer_encoded)
print(encoded)
# invert encoding
inverted = argmax(encoded[0])
